[1] 38 [1] TRUE
#1.Donors #2.Samples
samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "ununstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_baseline_condition = as.data.frame(genes_counts_ordered[, colnames(genes_counts_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
#remove samples in genes counts datafile
#genes_counts_cultured <- genes_counts_ordered2[,metadata_cultured$Sample]
experimentBycondition = unique(metadata4deg[,c("Donor_id", "Stimulation")])
#createDT(experimentBycondition)
as.data.frame(t(as.matrix(unclass( table(experimentBycondition$Stimulation, useNA = "ifany"))))) %>%
kable(row.names = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))| unstim | ununstim |
|---|---|
| 61 | 21 |
#genes_counts_cultured <- genes_counts_ordered2[,metadata_cultured$Sample]
experimentBycondition = unique(metadata4deg[,c("Sample", "Stimulation")])
#createDT(experimentBycondition)
as.data.frame(t(as.matrix(unclass( table(experimentBycondition$Stimulation, useNA = "ifany"))))) %>%
kable(row.names = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))| unstim | ununstim |
|---|---|
| 135 | 42 |
# The variable to be tested must be a fixed effect
names(metadata4deg) = tolower(names(metadata4deg))
#not corrected for region!
form <- ~ age + (1|donor_id) + stimulation + sex + picard_pct_ribosomal_bases + picard_pct_mrna_bases + picard_pct_percent_duplication + picard_pct_pf_reads_aligned
# estimate weights using linear mixed model of dream
#vobjDream = voomWithDreamWeights( genes_baseline_condition, form, metadata4deg)
# Fit the dream model on each gene
#fit = (dream( vobjDream, form, metadata4deg ))
# Results generated
#res_cultured_uncultured <- data.frame(topTable(fit, coef='stimulationununstim', number=nrow(genes_counts_cultured), sort.by = "p"), check.names = F)
#save(res_uncultured_cultured, file ="res_uncultured_cultured.Rdata")load("~/Documents/MiGASti/Databases/res_uncultured_cultured.Rdata")
load("~/Downloads/genes_counts_cultured.Rdata")
res_cult <- res_uncultured_cultured
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
res_cult <- tibble::rownames_to_column(res_cult, "ensembl")
res_name_cult <- merge(res_cult, gencode_30, by = "ensembl")
sign_cult <- subset(res_name_cult, adj.P.Val < 0.15)
length(rownames(sign_cult))## [1] 14250
sign_cult <- subset(res_name_cult, adj.P.Val < 0.10)
sign_cultlength(rownames(sign_cult))## [1] 13638
sign_cult <- subset(res_name_cult, adj.P.Val < 0.05)
sign_cultlength(rownames(sign_cult))## [1] 12615
res = res_name_cult
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-10,10) + ylim(0, 10) + ylab("-log10 pvalue")## Warning: Removed 2319 rows containing missing values (geom_point).
load("~/Documents/MiGASti/docs/res_name_LPS2.Rdata")#note. pvalues based on wilcox test of log2(tpm)+1 without additional correction(not based on pvalue DEGs with DREAM)
#set rownames to Sample
genes_tpm_filt = genes_tpm[, !colnames(genes_tpm) %in% exclude]
#Excludes the samples from filters.
#dim(genes_tpm_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)## [1] 38
gencode_30 <- read.delim("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
genes_tpm_filt = log2((genes_tpm_filt) + 1)
genes_tpm_filt <- as.data.frame(genes_tpm_filt)
genes_tpm_ordered <- genes_tpm_filt[,rownames(metadata_filt)]
#head(genes_tpm_ordered)
all(rownames(metadata_filt) == colnames (genes_tpm_ordered)) #TRUE## [1] TRUE
samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "ununstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
#create top 6 genes from DEG
deg_lists = res_name_cult[order(res_name_cult$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)#create dataframe
gene2check = as.data.frame(genes_baseline_condition[top_6$ensembl, ])
#merge dataframe with symbol (gene_id)
gene2check$ensembl = rownames(gene2check)
top_6 <- merge(gencode_30, top_6, by = "ensembl")
gene2check = merge(gene2check, top_6[, c("symbol.y", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
#create dataframe for plots
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol.y"))## Warning in melt(gene2check, id.vars = c("ensembl", "symbol.y")): The melt
## generic in data.table has been passed a data.frame and will attempt to redirect
## to the relevant reshape2 method; please note that reshape2 is deprecated, and
## this redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
## namespace like reshape2::melt(gene2check). In the next version, this warning
## will become an error.
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
# show direction of effect for male versus female
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol.y, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
res_cult_diff_top = res_name_cult[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_cult_diff_top)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html